---
  title: "CARRS RDD analysis : By previous CMD diagnosis"
knit: (function(input, ...) {rmarkdown::render(input, output_file="")})
output: pdf_document
geometry: "left=2cm,right=2cm,top=1cm,bottom=1cm"
date: "`r format(Sys.time(), '%d %B, %Y')`"



---
  
```{r , setup, include=FALSE, eval = FALSE}
knitr::opts_chunk$set(
  comment = '', warning=FALSE,  message=FALSE, results= FALSE , fig.width = 9,fig.height = 7, echo=FALSE, root.dir =""
  
)

knitr::opts_knit$set(
  root.dir =""
)

```




```{r packages}
rm(list=ls())
library(tidyverse)
library(gridExtra)
library(dplyr)
library(rdrobust)
library(rddensity)
library(viridisLite)
library(foreign)
library(ggplot2)
library(data.table)
library(doBy)
library(socviz)
library(openxlsx) 
library(modelsummary)
library(labelled)
library(gtsummary)
library(gt)
library(kableExtra)
```

```{r loaddata}
# data for outcome: drug treatment
  data_diagnosis<-data.frame(read.csv("carrs_selected.csv"))
  data_diagnosis<-as.data.table(data_diagnosis)
 
# data for outcome: diagnosis
  data_treatment<-data.frame(read.csv("carrs_full.csv"))
  data_treatment<-as.data.table(data_treatment)
  
# data for outcomes: systolic and diastolic blood pressure
  CARRSbp <-as.data.table(read.csv("carrs_bp.csv"))
  CARRSbp <- na.omit(CARRSbp, cols=(c("sysdiff","diasdiff","edu")))
```

## binary outcomes
##calculating the bandwidth for each sub group

```{r RunningVariableAll, eval = TRUE}

rv.update = function(b,c) {
  
  out=c[anyknow==b,runningvar:=ifelse((((sys-140)/(sd(sys))) == ((dias-90)/(sd(dias)))),
                                            (sys-140)/(sd(sys)),
                                            ifelse((((sys-140)/(sd(sys))) > ((dias-90)/(sd(dias)))),((sys-140)/(sd(sys))),((dias-90)/(sd(dias)))))]
  
  
  a_b<<-out
  
}

rv.update(1,data_diagnosis)
diag_diayes<-a_b[anyknow==1,]
rm(a_b)

rv.update(0,data_diagnosis)
diag_diano<-a_b[anyknow==0,]
rm(a_b)


rv.update(1,data_treatment)
treat_diayes<-a_b[anyknow==1,]
rm(a_b)

rv.update(0,data_treatment)
treat_diano<-a_b[anyknow==0,]
rm(a_b)
```

```{r Running VariableBySex, eval = TRUE}
# by sex
rv.update = function(a,b,c) {
  
  out=c[female==a & anyknow==b,runningvar:=ifelse((((sys-140)/(sd(sys))) == ((dias-90)/(sd(dias)))),
                                            (sys-140)/(sd(sys)),
                                            ifelse((((sys-140)/(sd(sys))) > ((dias-90)/(sd(dias)))),((sys-140)/(sd(sys))),((dias-90)/(sd(dias)))))]
  
  
  a_b<<-out
  
}

# males
rv.update(0,1,data_diagnosis)
diag_diayesM<-a_b[anyknow==1 & female==0,]
rm(a_b)

rv.update(0,0,data_diagnosis)
diag_dianoM<-a_b[anyknow==0 & female==0,]
rm(a_b)


rv.update(0,1,data_treatment)
treat_diayesM<-a_b[anyknow==1 & female==0,]
rm(a_b)

rv.update(0,0,data_treatment)
treat_dianoM<-a_b[anyknow==0 & female==0,]
rm(a_b)

# females
rv.update(1,1,data_diagnosis)
diag_diayesF<-a_b[anyknow==1 & female==1,]
rm(a_b)

rv.update(1,0,data_diagnosis)
diag_dianoF<-a_b[anyknow==0 & female==1,]
rm(a_b)


rv.update(1,1,data_treatment)
treat_diayesF<-a_b[anyknow==1 & female==1,]
rm(a_b)

rv.update(1,0,data_treatment)
treat_dianoF<-a_b[anyknow==0 & female==1,]
rm(a_b)
```


# RDD function - binary outcomes

```{r RDfunction, eval = TRUE}
##Defining the common function##
carrs_rdd = function(a,b,d) {
  
  d$Y<-a
  d$X<-b
  
  
  out =  rdrobust(y = d$Y, d$X, kernel = "triangular", p = 1, covs = cbind(d$w01, d$w23, d$w45),
                  c = 0, cluster = d$pid, rho = 1 , all = T)
  summary(out)
  
  ret <- data.frame(
    diadiag = c(formatC(round(100*out$coef[1,1],2),2,format="f"), formatC(round(out$pv[3,1],2),2,format="f"), 
            paste0( "(", formatC(round(100*out$ci[3,1],2),2,format="f"), " - ", formatC(round(100*out$ci[3,2],2),2,format="f"), ")" ), 
            formatC(round(out$bws[1,1],2),2,format="f"), as.character(out$N_h[1] + out$N_h[2]))
  )
  rownames(ret) <- c("Coefficient", "p-value","95\\% CI", "Bandwidth", "Obs. within bandwidth")
  
  output<<-ret %>% add_rownames  
}
```

##Calling the specific subroups and saving the output

```{r Diagnosis, eval = TRUE}
rdd = function(a) {
  
  carrs_rdd(a$hypdiag,a$runningvar,a)
  
}

rdd(diag_diayes)
diag_diayes <-output%>%mutate(Dia=diadiag)%>%select(-diadiag)

rdd(diag_diano)
diag_diano <-output%>%mutate(NoDia=diadiag)%>%select(-diadiag)

# males
rdd(diag_diayesM)
diag_diayesM <-output%>%mutate(Dia=diadiag)%>%select(-diadiag)

rdd(diag_dianoM)
diag_dianoM <-output%>%mutate(NoDia=diadiag)%>%select(-diadiag)

# females
rdd(diag_diayesF)
diag_diayesF <-output%>%mutate(Dia=diadiag)%>%select(-diadiag)

rdd(diag_dianoF)
diag_dianoF <-output%>%mutate(NoDia=diadiag)%>%select(-diadiag)
```

```{r Treatment, eval = TRUE}
rdd = function(a) {
  
  carrs_rdd(a$hypdrug,a$runningvar,a)
  
}

rdd(treat_diayes)
treat_diayes <-output%>%mutate(Dia=diadiag)%>%select(-diadiag)

rdd(treat_diano)
treat_diano <-output%>%mutate(NoDia=diadiag)%>%select(-diadiag)

# males

rdd(treat_diayesM)
treat_diayesM <-output%>%mutate(Dia=diadiag)%>%select(-diadiag)

rdd(treat_dianoM)
treat_dianoM <-output%>%mutate(NoDia=diadiag)%>%select(-diadiag)

# females
rdd(treat_diayesF)
treat_diayesF <-output%>%mutate(Dia=diadiag)%>%select(-diadiag)

rdd(treat_dianoF)
treat_dianoF <-output%>%mutate(NoDia=diadiag)%>%select(-diadiag)
```


```{r MergingResults, eval = TRUE}
diag_table<-merge(diag_diayes,diag_diano,by='rowname')
diag_table<-diag_table %>% arrange(match(rowname, c("Coefficient", "p-value","95\\% CI", "Bandwidth", "Obs. within bandwidth")))

treat_table<-merge(treat_diayes,treat_diano,by='rowname')
treat_table<-treat_table %>% arrange(match(rowname, c("Coefficient", "p-value","95\\% CI", "Bandwidth", "Obs. within bandwidth")))

# males
diag_tableM<-merge(diag_diayesM,diag_dianoM,by='rowname')
diag_tableM<-diag_tableM %>% arrange(match(rowname, c("Coefficient", "p-value","95\\% CI", "Bandwidth", "Obs. within bandwidth")))

treat_tableM<-merge(treat_diayesM,treat_dianoM,by='rowname')
treat_tableM<-treat_tableM %>% arrange(match(rowname, c("Coefficient", "p-value","95\\% CI", "Bandwidth", "Obs. within bandwidth")))

# females
diag_tableF<-merge(diag_diayesF,diag_dianoF,by='rowname')
diag_tableF<-diag_tableF %>% arrange(match(rowname, c("Coefficient", "p-value","95\\% CI", "Bandwidth", "Obs. within bandwidth")))

treat_tableF<-merge(treat_diayesF,treat_dianoF,by='rowname')
treat_tableF<-treat_tableF %>% arrange(match(rowname, c("Coefficient", "p-value","95\\% CI", "Bandwidth", "Obs. within bandwidth")))
```

```{r Table, eval = TRUE}
#combining diagnosis and treatment
  filename <- paste0("prevdiagnosis_combined.tex")
  caption=paste0("Effect of home-based screening on hypertension diagnosis and treatment by previous diabetes, heart attack, or stroke diagnosis")

  comb_diag_table<-rbind(diag_table,diag_tableF,diag_tableM)
  comb_treat_table<-rbind(treat_table,treat_tableF,treat_tableM)
  
  comb_prevdiag_table <- cbind(comb_diag_table,comb_treat_table)
  comb_prevdiag_table <- comb_prevdiag_table[-c(4)]

combined<-kbl(comb_prevdiag_table,caption=caption, booktabs = T, linesep = '', escape = FALSE,align = "lccccc", format = "latex",col.names = c(" ","Previous diagnosis","No previous diagnosis","Previous diagnosis","No previous diagnosis"),digits = 2) %>%
    add_header_above(c(" ", "Diagnosis" = 2, "Treatment" = 2)) %>%
    kable_styling(latex_options = "HOLD_position", position = "center") %>%
    pack_rows("Panel A: Total", 1,5) %>%
    pack_rows("Estimation results", 1,3) %>%
    pack_rows("Bandwidth details", 4, 5) %>%
    pack_rows(" ", 6,10) %>%
    pack_rows("Panel B: Females", 6,10) %>%
    pack_rows("Estimation results", 6, 8) %>%
    pack_rows("Bandwidth details", 9, 10) %>%
    pack_rows(" ", 11,15) %>%
    pack_rows("Panel C: Males", 11,15) %>%
    pack_rows("Estimation results", 11, 13) %>%
    pack_rows("Bandwidth details", 14, 15) %>%
    footnote(general = c("Note: The model estimation included a local linear trend, triangular Kernel weights, and a data-driven bandwidth selection (mean squared error optimal bandwidth). The table displays the local average treatment effect in percentage points for individuals with a standardized blood pressure of zero and confidence intervals resulting from robust bias-corrected standard errors clustered at the individual-level.","Abbreviation: CI = confidence interval"),general_title="",threeparttable = T, fixed_small_size = T) %>%
save_kable(filename, format = "latex")
```

## continuous outcomes
##calculating the bandwidth for each sub group

```{r RunningVariableAllBP, eval = TRUE}
rv.update = function(b,c) {
  
  out=c[anyknow==b,runningvar:=ifelse((((sys-140)/(sd(sys))) == ((dias-90)/(sd(dias)))),
                                            (sys-140)/(sd(sys)),
                                            ifelse((((sys-140)/(sd(sys))) > ((dias-90)/(sd(dias)))),((sys-140)/(sd(sys))),((dias-90)/(sd(dias)))))]
  
  
  a_b<<-out
  
}


rv.update(1,CARRSbp)
bp_diayes<-a_b[anyknow==1,]
rm(a_b)

rv.update(0,CARRSbp)
bp_diano<-a_b[anyknow==0,]
rm(a_b)
```

```{r Running VariableBySexBP, eval = TRUE}
# by sex
rv.update = function(a,b,c) {
  
  out=c[female==a & anyknow==b,runningvar:=ifelse((((sys-140)/(sd(sys))) == ((dias-90)/(sd(dias)))),
                                            (sys-140)/(sd(sys)),
                                            ifelse((((sys-140)/(sd(sys))) > ((dias-90)/(sd(dias)))),((sys-140)/(sd(sys))),((dias-90)/(sd(dias)))))]
  a_b<<-out
}

# males
rv.update(0,1,CARRSbp)
bp_diayesM<-a_b[anyknow==1 & female==0,]
rm(a_b)

rv.update(0,0,CARRSbp)
bp_dianoM<-a_b[anyknow==0 & female==0,]
rm(a_b)

# females
rv.update(1,1,CARRSbp)
bp_diayesF<-a_b[anyknow==1 & female==1,]
rm(a_b)

rv.update(1,0,CARRSbp)
bp_dianoF<-a_b[anyknow==0 & female==1,]
rm(a_b)
```

# RDD function - continuous outcomes

```{r RDfunctionBP, eval = TRUE}
##Defining the common function##
carrs_rddBP = function(a,b,d) {
  
  d$Y<-a
  d$X<-b
  
  
  out =  rdrobust(y = d$Y, d$X, kernel = "triangular", p = 1, covs = cbind(d$w02, d$w24),
                  c = 0, cluster = d$pid, rho = 1 , all = T)
  summary(out)
  
  ret <- data.frame(
    diadiagBP = c(formatC(round(out$coef[1,1],2),2,format="f"), formatC(round(out$pv[3,1],2),2,format="f"), 
            paste0( "(", formatC(round(out$ci[3,1],2),2,format="f"), " - ", formatC(round(out$ci[3,2],2),2,format="f"), ")" ), 
            formatC(round(out$bws[1,1],2),2,format="f"), formatC(out$N_h[1] + out$N_h[2]))
  )
  rownames(ret) <- c("Coefficient", "p-value","95\\% CI", "Bandwidth", "Obs. within bandwidth")
  
  output<<-ret %>% add_rownames  
}
```

##Calling the specific subroups and saving the output

```{r Systolic, eval = TRUE}
rddBP = function(a) {
  
  carrs_rddBP(a$sysdiff ,a$runningvar,a)
  
}

rddBP(bp_diayes)
sys_diayes <-output%>%mutate(Dia=diadiagBP)%>%select(-diadiagBP)

rddBP(bp_diano)
sys_diano <-output%>%mutate(NoDia=diadiagBP)%>%select(-diadiagBP)

# males
rddBP(bp_diayesM)
sys_diayesM <-output%>%mutate(Dia=diadiagBP)%>%select(-diadiagBP)

rddBP(bp_dianoM)
sys_dianoM <-output%>%mutate(NoDia=diadiagBP)%>%select(-diadiagBP)

# females
rddBP(bp_diayesF)
sys_diayesF <-output%>%mutate(Dia=diadiagBP)%>%select(-diadiagBP)

rddBP(bp_dianoF)
sys_dianoF <-output%>%mutate(NoDia=diadiagBP)%>%select(-diadiagBP)
```

```{r Diastolic, eval = TRUE}
rddBP = function(a) {
  
  carrs_rddBP(a$diasdiff ,a$runningvar,a)
  
}

rddBP(bp_diayes)
dias_diayes <-output%>%mutate(Dia=diadiagBP)%>%select(-diadiagBP)

rddBP(bp_diano)
dias_diano <-output%>%mutate(NoDia=diadiagBP)%>%select(-diadiagBP)

# males
rddBP(bp_diayesM)
dias_diayesM <-output%>%mutate(Dia=diadiagBP)%>%select(-diadiagBP)

rddBP(bp_dianoM)
dias_dianoM <-output%>%mutate(NoDia=diadiagBP)%>%select(-diadiagBP)

# females
rddBP(bp_diayesF)
dias_diayesF <-output%>%mutate(Dia=diadiagBP)%>%select(-diadiagBP)

rddBP(bp_dianoF)
dias_dianoF <-output%>%mutate(NoDia=diadiagBP)%>%select(-diadiagBP)
```

```{r MergingResultsBP, eval = TRUE}
sys_table<-merge(sys_diayes,sys_diano,by='rowname')
sys_table<-sys_table %>% arrange(match(rowname, c("Coefficient", "p-value","95\\% CI", "Bandwidth", "Obs. within bandwidth")))

dias_table<-merge(dias_diayes,dias_diano,by='rowname')
dias_table<-dias_table %>% arrange(match(rowname, c("Coefficient", "p-value","95\\% CI", "Bandwidth", "Obs. within bandwidth")))

# males
sys_tableM<-merge(sys_diayesM,sys_dianoM,by='rowname')
sys_tableM<-sys_tableM %>% arrange(match(rowname, c("Coefficient", "p-value","95\\% CI", "Bandwidth", "Obs. within bandwidth")))

dias_tableM<-merge(dias_diayesM,dias_dianoM,by='rowname')
dias_tableM<-dias_tableM %>% arrange(match(rowname, c("Coefficient", "p-value","95\\% CI", "Bandwidth", "Obs. within bandwidth")))

# females
sys_tableF<-merge(sys_diayesF,sys_dianoF,by='rowname')
sys_tableF<-sys_tableF %>% arrange(match(rowname, c("Coefficient", "p-value","95\\% CI", "Bandwidth", "Obs. within bandwidth")))

dias_tableF<-merge(dias_diayesF,dias_dianoF,by='rowname')
dias_tableF<-dias_tableF %>% arrange(match(rowname, c("Coefficient", "p-value","95\\% CI", "Bandwidth", "Obs. within bandwidth")))
```

```{r TableBP, eval = TRUE}
#combining systolic and diastolic
  filename <- paste0("prevdiagnosis_combined_bpdiff.tex")
  caption=paste0("Effect of home-based screening on systolic and diastolic blood pressure by previous diabetes, heart attack, or stroke diagnosis")

  comb_sys_table<-rbind(sys_table,sys_tableF,sys_tableM)
  comb_dias_table<-rbind(dias_table,dias_tableF,dias_tableM)
  
  comb_prevdiag_table <- cbind(comb_sys_table,comb_dias_table)
  comb_prevdiag_table <- comb_prevdiag_table[-c(4)]

combined<-kbl(comb_prevdiag_table,caption=caption, booktabs = T, linesep = '', escape = FALSE,align = "lccccc", format = "latex",col.names = c(" ","Previous diagnosis","No previous diagnosis","Previous diagnosis","No previous diagnosis"),digits = 2) %>%
    add_header_above(c(" ", "Systolic" = 2, "Diastolic" = 2)) %>%
    kable_styling(latex_options = "HOLD_position", position = "center") %>%
    pack_rows("Panel A: Total", 1,5) %>%
    pack_rows("Estimation results", 1,3) %>%
    pack_rows("Bandwidth details", 4, 5) %>%
    pack_rows(" ", 6,10) %>%
    pack_rows("Panel B: Females", 6,10) %>%
    pack_rows("Estimation results", 6, 8) %>%
    pack_rows("Bandwidth details", 9, 10) %>%
    pack_rows(" ", 11,15) %>%
    pack_rows("Panel C: Males", 11,15) %>%
    pack_rows("Estimation results", 11, 13) %>%
    pack_rows("Bandwidth details", 14, 15) %>%
    footnote(general = c("Note: The model estimation included a local linear trend, triangular Kernel weights, and a data-driven bandwidth selection (mean squared error optimal bandwidth). The table displays the local average treatment effect in percentage points for individuals with a standardized blood pressure of zero and confidence intervals resulting from robust bias-corrected standard errors clustered at the individual-level.","Abbreviation: CI = confidence interval"),general_title="",threeparttable = T, fixed_small_size = T) %>%
save_kable(filename, format = "latex")
```



